library(rethinking)
pos <- replicate(1000, sum(runif(16, -1, 1)))
hist(pos)
plot(density(pos))
dens(pos)
# 4.2
prod(1 + runif(12, 0, 0.1))
[1] 1.895617
growth <- replicate(10000, prod(1 + runif(12, 0, 0.1)))
dens(growth, norm.comp = TRUE)
big <- replicate(10000, prod(1 + runif(12, 0, 0.5)))
dens(big, norm.comp = TRUE)
small <- replicate(10000, prod(1 + runif(12, 0, 0.01)))
dens(small, norm.comp = TRUE)
# 4.5
log.big <- replicate(10000, log(prod(1 + runif(12, 0, 0.5))))
dens(log.big, norm.comp = TRUE)
# 4.6
w <- 6
n <- 9
p_grid <- seq(from = 0, to = 1, length.out = 100)
posterior <- dbinom(w, n, p_grid) * dunif(p_grid, 0, 1)
posterior <- posterior/sum(posterior)
library(rethinking)
data("Howell1")
d <- Howell1
d
str(d)
'data.frame': 544 obs. of 4 variables:
$ height: num 152 140 137 157 145 ...
$ weight: num 47.8 36.5 31.9 53 41.3 ...
$ age : num 63 63 65 41 51 35 32 27 19 54 ...
$ male : int 1 0 0 1 0 1 0 1 0 1 ...
d$height
[1] 151.7650 139.7000 136.5250 156.8450 145.4150 163.8300 149.2250 168.9100 147.9550 165.1000
[11] 154.3050 151.1300 144.7800 149.9000 150.4950 163.1950 157.4800 143.9418 121.9200 105.4100
[21] 86.3600 161.2900 156.2100 129.5400 109.2200 146.4000 148.5900 147.3200 137.1600 125.7300
[31] 114.3000 147.9550 161.9250 146.0500 146.0500 152.7048 142.8750 142.8750 147.9550 160.6550
[41] 151.7650 162.8648 171.4500 147.3200 147.9550 144.7800 121.9200 128.9050 97.7900 154.3050
[51] 143.5100 146.7000 157.4800 127.0000 110.4900 97.7900 165.7350 152.4000 141.6050 158.8000
[61] 155.5750 164.4650 151.7650 161.2900 154.3050 145.4150 145.4150 152.4000 163.8300 144.1450
[71] 129.5400 129.5400 153.6700 142.8750 146.0500 167.0050 158.4198 91.4400 165.7350 149.8600
[81] 147.9550 137.7950 154.9400 160.9598 161.9250 147.9550 113.6650 159.3850 148.5900 136.5250
[91] 158.1150 144.7800 156.8450 179.0700 118.7450 170.1800 146.0500 147.3200 113.0300 162.5600
[101] 133.9850 152.4000 160.0200 149.8600 142.8750 167.0050 159.3850 154.9400 148.5900 111.1250
[111] 111.7600 162.5600 152.4000 124.4600 111.7600 86.3600 170.1800 146.0500 159.3850 151.1300
[121] 160.6550 169.5450 158.7500 74.2950 149.8600 153.0350 96.5200 161.9250 162.5600 149.2250
[131] 116.8400 100.0760 163.1950 161.9250 145.4150 163.1950 151.1300 150.4950 141.6050 170.8150
[141] 91.4400 157.4800 152.4000 149.2250 129.5400 147.3200 145.4150 121.9200 113.6650 157.4800
[151] 154.3050 120.6500 115.6000 167.0050 142.8750 152.4000 96.5200 160.0000 159.3850 149.8600
[161] 160.6550 160.6550 149.2250 125.0950 140.9700 154.9400 141.6050 160.0200 150.1648 155.5750
[171] 103.5050 94.6150 156.2100 153.0350 167.0050 149.8600 147.9550 159.3850 161.9250 155.5750
[181] 159.3850 146.6850 172.7200 166.3700 141.6050 142.8750 133.3500 127.6350 119.3800 151.7650
[191] 156.8450 148.5900 157.4800 149.8600 147.9550 102.2350 153.0350 160.6550 149.2250 114.3000
[201] 100.9650 138.4300 91.4400 162.5600 149.2250 158.7500 149.8600 158.1150 156.2100 148.5900
[211] 143.5100 154.3050 131.4450 157.4800 157.4800 154.3050 107.9500 168.2750 145.4150 147.9550
[221] 100.9650 113.0300 149.2250 154.9400 162.5600 156.8450 123.1900 161.0106 144.7800 143.5100
[231] 149.2250 110.4900 149.8600 165.7350 144.1450 157.4800 154.3050 163.8300 156.2100 153.6700
[241] 134.6200 144.1450 114.3000 162.5600 146.0500 120.6500 154.9400 144.7800 106.6800 146.6850
[251] 152.4000 163.8300 165.7350 156.2100 152.4000 140.3350 158.1150 163.1950 151.1300 171.1198
[261] 149.8600 163.8300 141.6050 93.9800 149.2250 105.4100 146.0500 161.2900 162.5600 145.4150
[271] 145.4150 170.8150 127.0000 159.3850 159.4000 153.6700 160.0200 150.4950 149.2250 127.0000
[281] 142.8750 142.1130 147.3200 162.5600 164.4650 160.0200 153.6700 167.0050 151.1300 147.9550
[291] 125.3998 111.1250 153.0350 139.0650 152.4000 154.9400 147.9550 143.5100 117.9830 144.1450
[301] 92.7100 147.9550 155.5750 150.4950 155.5750 154.3050 130.6068 101.6000 157.4800 168.9100
[311] 150.4950 111.7600 160.0200 167.6400 144.1450 145.4150 160.0200 147.3200 164.4650 153.0350
[321] 149.2250 160.0200 149.2250 85.0900 84.4550 59.6138 92.7100 111.1250 90.8050 153.6700
[331] 99.6950 62.4840 81.9150 96.5200 80.0100 150.4950 151.7650 140.6398 88.2650 158.1150
[341] 149.2250 151.7650 154.9400 123.8250 104.1400 161.2900 148.5900 97.1550 93.3450 160.6550
[351] 157.4800 167.0050 157.4800 91.4400 60.4520 137.1600 152.4000 152.4000 81.2800 109.2200
[361] 71.1200 89.2048 67.3100 85.0900 69.8500 161.9250 152.4000 88.9000 90.1700 71.7550
[371] 83.8200 159.3850 142.2400 142.2400 168.9100 123.1900 74.9300 74.2950 90.8050 160.0200
[381] 67.9450 135.8900 158.1150 85.0900 93.3450 152.4000 155.5750 154.3050 156.8450 120.0150
[391] 114.3000 83.8200 156.2100 137.1600 114.3000 93.9800 168.2750 147.9550 139.7000 157.4800
[401] 76.2000 66.0400 160.7000 114.3000 146.0500 161.2900 69.8500 133.9850 67.9450 150.4950
[411] 163.1950 148.5900 148.5900 161.9250 153.6700 68.5800 151.1300 163.8300 153.0350 151.7650
[421] 132.0800 156.2100 140.3350 158.7500 142.8750 84.4550 151.9428 161.2900 127.9906 160.9852
[431] 144.7800 132.0800 117.9830 160.0200 154.9400 160.9852 165.9890 157.9880 154.9400 97.9932
[441] 64.1350 160.6550 147.3200 146.7000 147.3200 172.9994 158.1150 147.3200 124.9934 106.0450
[451] 165.9890 149.8600 76.2000 161.9250 140.0048 66.6750 62.8650 163.8300 147.9550 160.0200
[461] 154.9400 152.4000 62.2300 146.0500 151.9936 157.4800 55.8800 60.9600 151.7650 144.7800
[471] 118.1100 78.1050 160.6550 151.1300 121.9200 92.7100 153.6700 147.3200 139.7000 157.4800
[481] 91.4400 154.9400 143.5100 83.1850 158.1150 147.3200 123.8250 88.9000 160.0200 137.1600
[491] 165.1000 154.9400 111.1250 153.6700 145.4150 141.6050 144.7800 163.8300 161.2900 154.9000
[501] 161.3000 170.1800 149.8600 123.8250 85.0900 160.6550 154.9400 106.0450 126.3650 166.3700
[511] 148.2852 124.4600 89.5350 101.6000 151.7650 148.5900 153.6700 53.9750 146.6850 56.5150
[521] 100.9650 121.9200 81.5848 154.9400 156.2100 132.7150 125.0950 101.6000 160.6550 146.0500
[531] 132.7150 87.6300 156.2100 152.4000 162.5600 114.9350 67.9450 142.8750 76.8350 145.4150
[541] 162.5600 156.2100 71.1200 158.7500
d2 <- d[d$age >= 18,]
dens(d2$height)
curve(dnorm(x, 178, 20), from = 100, to = 250)
curve(dunif(x, 0, 50), from = -10, to = 60)
# 4.13
sample_mu <- rnorm(1e4, 178, 20)
sample_sigma <- runif(1e4, 0, 50)
prior_h <- rnorm(1e4, sample_mu, sample_sigma)
dens(prior_h)
## R code 4.14
mu.list <- seq( from=140, to=160 , length.out=200 )
sigma.list <- seq( from=4 , to=9 , length.out=200 )
post <- expand.grid( mu=mu.list , sigma=sigma.list )
post$LL <- sapply( 1:nrow(post) , function(i) sum( dnorm(
d2$height ,
mean=post$mu[i] ,
sd=post$sigma[i] ,
log=TRUE ) ) )
post$prod <- post$LL + dnorm( post$mu , 178 , 20 , TRUE ) +
dunif( post$sigma , 0 , 50 , TRUE )
post$prob <- exp( post$prod - max(post$prod) )
# 4.15
contour_xyz(post$mu, post$sigma, post$prob)
image_xyz(post$mu, post$sigma, post$prob)
# 4.17
sample.rows <- sample(1:nrow(post), size = 1e4, replace = TRUE, prob = post$prob)
sample.mu <- post$mu[sample.rows]
sample.sigma <- post$sigma[sample.rows]
plot(sample.mu, sample.sigma, cex=1, pch = 16, col = col.alpha(rangi2, 0.1))
dens(sample.mu)
dens(sample.sigma)
HPDI(sample.mu)
|0.89 0.89|
153.8693 155.1759
HPDI(sample.sigma)
|0.89 0.89|
7.266332 8.195980
d3 <- sample(d2$height, size = 20)
mu.list <- seq( from=150, to=170 , length.out=200 )
sigma.list <- seq( from=4 , to=20 , length.out=200 )
post2 <- expand.grid( mu=mu.list , sigma=sigma.list )
post2$LL <- sapply( 1:nrow(post2) , function(i)
sum( dnorm( d3 , mean=post2$mu[i] , sd=post2$sigma[i] ,
log=TRUE ) ) )
post2$prod <- post2$LL + dnorm( post2$mu , 178 , 20 , TRUE ) +
dunif( post2$sigma , 0 , 50 , TRUE )
post2$prob <- exp( post2$prod - max(post2$prod) )
sample2.rows <- sample( 1:nrow(post2) , size=1e4 , replace=TRUE ,
prob=post2$prob )
sample2.mu <- post2$mu[ sample2.rows ]
sample2.sigma <- post2$sigma[ sample2.rows ]
plot( sample2.mu , sample2.sigma , cex=0.5 ,
col=col.alpha(rangi2,0.1) ,
xlab="mu" , ylab="sigma" , pch=16 )
library(rethinking)
data("Howell1")
d <- Howell1
d2 <- d[d$age >= 18,]
flist <- alist(
height ~ dnorm(mu, sigma),
mu ~ dnorm(178, 20),
sigma ~ dunif(0, 50)
)
m4.1 <- map(flist, data = d2)
precis(m4.1)
m4.2 <- map(
alist(
height ~ dnorm(mu, sigma),
mu ~ dnorm(178, 0.1),
sigma ~ dunif(0, 50)
),
data = d2
)
precis(m4.2)
vcov(m4.1)
mu sigma
mu 0.1697426228 0.0002174044
sigma 0.0002174044 0.0849095905
diag(vcov(m4.1))
mu sigma
0.16974262 0.08490959
cov2cor(vcov(m4.1))
mu sigma
mu 1.000000000 0.001810901
sigma 0.001810901 1.000000000
library(rethinking)
post <- extract.samples(m4.1, n = 1e4)
head(post)
precis(post)
plot(post, cex = 0.5, pch = 16, col = col.alpha(rangi2, 0.5))
# load data again since it's a long way back
library(rethinking)
data("Howell1")
d <- Howell1
d2 <- d[d$age >= 18,]
# fit model
m4.3 <- map(
alist(
height ~ dnorm(mu, sigma),
mu <- a + b*weight,
a ~ dnorm(156, 100),
b ~ dnorm(0, 10),
sigma ~ dunif(0, 50)
),
data = d2
)
precis(m4.3, corr = TRUE)
mean(d2$weight.c)
[1] 7.670718e-16
m4.4 <- map(
alist(
height ~ dnorm(mu, sigma),
mu <- a + b*weight.c,
a ~ dnorm(178, 100),
b ~ dnorm(0, 10),
sigma ~ dunif(0, 50)
),
data = d2
)
precis(m4.4, corr = TRUE)
plot(height ~ weight, data = d2)
abline(a = coef(m4.3)["a"], b = coef(m4.3)["b"])
post <- extract.samples(m4.3)
post[1:5,]
N <- 10
dN <- d2[1:N, ]
mN <- map(
alist(
height ~ dnorm(mu, sigma),
mu <- a + b*weight,
a ~ dnorm(178, 100),
b ~ dnorm(0, 50),
sigma ~ dunif(0, 50)
), data = dN
)
# extract 20 samples from the posterio
post <- extract.samples(mN, n = 20)
# display raw data and sample size
plot(dN$weight, dN$height,
xlim = range(d2$weight), ylim = range(d2$height),
col = rangi2, xlab="weight", ylab="height")
mtext(concat("N = ", N))
# plot th elines with transparency
for (i in 1:20) abline(a = post$a[i], b = post$b[i], col=col.alpha("black", 0.3))
mu_at_50 <- post$a + post$b * 50
dens(mu_at_50, col = rangi2, lwd = 2, xlab = "mu|weight = 50")
HPDI(mu_at_50, prob = 0.89)
|0.89 0.89|
155.2193 159.1355
mu <- link(m4.3)
[ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
str(mu)
num [1:1000, 1:352] 157 157 157 157 157 ...
# define sequence of weights to compute predictions for
# these values will be on the horizontal axis
weight.seq <- seq( from=25 , to=70 , by=1 )
# use link to compute mu
# for each sample from posterior
# and for each weight in weight.seq
mu <- link( m4.3 , data=data.frame(weight=weight.seq) )
[ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
str(mu)
num [1:1000, 1:46] 136 135 137 137 135 ...
# use type="n" to hide raw data
plot(height ~ weight, d2, type = "n")
# loop over samples and plot each mu value
for(i in 1:100){
points(weight.seq, mu[i,], pch = 16, col = col.alpha(rangi2, 0.1))
}
mu.HPDI[1,]
[1] 135.0121 135.9807 136.9615 137.9563 138.9417 139.8796 140.8657 141.8278 142.8008 143.6850
[11] 144.6643 145.6734 146.6507 147.5503 148.5107 149.5080 150.5013 151.4315 152.3629 153.3082
[21] 154.2385 155.1276 155.9834 156.8893 157.7212 158.5835 159.4325 160.2964 161.1414 161.9686
[31] 162.7959 163.6612 164.5303 165.3620 166.3665 167.1012 167.9612 168.8176 169.6567 170.5118
[41] 171.3494 172.1193 172.9695 173.8270 174.6640 175.4926
# plot raw data
# fading out points to make line and interval more visible
plot(height ~ weight, data = d2, col=col.alpha(rangi2, 0.5))
# plot the MAP line, aka the mean mu for each weight
lines(weight.seq, mu.mean)
# plot a shaded region for the 89% HPDI
shade(mu.HPDI, weight.seq)
sim.height <- sim(m4.3, data = list(weight = weight.seq))
[ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
str(sim.height)
num [1:1000, 1:46] 142 132 140 135 138 ...
height.PI <- apply(sim.height, 2, PI, prob=0.89)
# plot raw data
plot( height ~ weight , d2 , col=col.alpha(rangi2,0.5) )
# draw MAP line
lines( weight.seq , mu.mean )
# draw HPDI region for line
shade( mu.HPDI , weight.seq )
# draw PI region for simulated heights
shade( height.PI , weight.seq )
d$weight.s2 <- d$weight.s^2
m4.5 <- map(
alist(
height ~ dnorm( mu , sigma ) ,
mu <- a + b1*weight.s + b2*weight.s2 ,
a ~ dnorm( 178 , 100 ) ,
b1 ~ dnorm( 0 , 10 ) ,
b2 ~ dnorm( 0 , 10 ) ,
sigma ~ dunif( 0 , 50 )
) ,
data=d )
weight.seq <- seq(from = -2.2, to = 2, length.out = 30)
pred_dat <- list(weight.s = weight.seq, weight.s2 = weight.seq ^ 2)
mu <- link(m4.5, data = pred_dat)
[ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
mu.mean <- apply(mu, 2, mean)
mu.PI <- apply(mu, 2, PI, prob = 0.89)
sim.height <- sim(m4.5, data = pred_dat)
[ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
height.PI <- apply(sim.height, 2, PI, prob = 0.89)
plot(height ~ weight.s, d, col = col.alpha(rangi2, 0.5))
lines(weight.seq, mu.mean)
shade(mu.PI, weight.seq)
shade(height.PI, weight.seq)
d$weight.s3 <- d$weight.s^3
m4.6 <- map(
alist(
height ~ dnorm( mu , sigma ) ,
mu <- a + b1*weight.s + b2*weight.s2 + b3*weight.s3 ,
a ~ dnorm( 178 , 100 ) ,
b1 ~ dnorm( 0 , 10 ) ,
b2 ~ dnorm( 0 , 10 ) ,
b3 ~ dnorm( 0 , 10 ) ,
sigma ~ dunif( 0 , 50 )
) ,
data=d )
weight.seq <- seq(from = -2.2, to = 2, length.out = 30)
pred_dat <- list(weight.s = weight.seq,
weight.s2 = weight.seq^2,
weight.s3 = weight.seq^3)
mu <- link(m4.6, data = pred_dat)
[ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
mu.mean <- apply(mu, 2, mean)
mu.PI <- apply(mu, 2, PI, prob = 0.89)
sim.height <- sim(m4.6, data = pred_dat)
[ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
height.PI <- apply(sim.height, 2, PI, prob = 0.89)
plot(height ~ weight.s, d, col = col.alpha(rangi2, 0.5))
lines(weight.seq, mu.mean)
shade(mu.PI, weight.seq)
shade(height.PI, weight.seq)
plot(height ~ weight.s, d, col = col.alpha(rangi2, 0.5), xaxt = "n")
at <- c(-2, -1, 0, 1, 2)
labels <- at*sd(d$weight) + mean(d$weight)
axis(side = 1, at = at, labels = round(labels, 1))
The first line
Two
p(mu, sigma | y = Pi Normal(hi|mu, sigma)Normal(mu | 0, 10) Uniform( sigma | 0, 10))/…
The second line
Three: a, b, s
\(x=2\)
\[\begin{align} $y_{i} \sim \text{Normal}(\mu, \sigma)$ $\mu_{i} = \alpha + \beta x_{i}$ $\alpha \sim \text{Normal}(0, 50)$ $\beta \sim \text{Uniform}(0, 10)$ $\sigma \sim \text{Uniform}(0, 50)$ \end{align}\]